*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*	Estimate sampling variances and covariances for simulations
*	----------------------------------------------------------------------------
	args sample sch_res ptype bw
	tokenize `sample', parse("_")
	local years "`5'`6'`7'`8'"

	local tests m

*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

*	hyperparameter calculations for contemporaneous OLS VAMs

	use "${builddata}${city}_analysisfile`sample'`sch_res'`bw'.dta", clear

	* rename older lagged scores
	if "${city}"=="NYC" ren (bl_ss_math_6 bl_ss_ela_6) (bl_ss_math_lag bl_ss_ela_lag)
	if "${city}"=="NYCms" ren (bl_ss_math_3 bl_ss_ela_3) (bl_ss_math_lag bl_ss_ela_lag)

	* order schools
	preserve

		local D enr_????

		if "${city}" == "NYC" {
			drop enr_4555
			drop enr_2337
			drop enr_2583
			drop enr_3459
			drop enr_3552
			drop enr_6334
			drop enr_6365
			drop enr_6403
			drop enr_6524
			drop enr_6564
			drop enr_6388
		}
		if "${city}" == "NYCms" {
			drop enr_6593
			drop enr_3349
		}

		_rmcoll `D' if lottery_`ptype'_sample, forcedrop nocons
			local D = r(varlist)

		gen tag = 0
		foreach var of varlist `D'{
			local sch = substr("`var'",-4,.)
			replace tag = 1 if sch == `sch'
		}
		keep if tag

		keep sch lottery_`ptype'_sch omitted
		duplicates drop

		//lotteries first
		gsort omitted -lottery_`ptype'_sch sch
		gen newnum = _n

		//count schools
		local J=_N
		count if lottery_`ptype'_sch & !omitted
		local L=r(N)

		//store ordering in locals
		forval i=1/`J'{
			qui sum sch if newnum == `i'
			local oldnum_`i' = r(mean)
			di in red "`oldnum_`i''"
		}

	restore

	local D
	local enrvar enr
	forval k=1/1{
		forval i=1/`J'{
			gen `enrvar'_`oldnum_`i''_`k' = `enrvar'_`oldnum_`i''
			local D `D' `enrvar'_`oldnum_`i''_`k'
		}
	}

	* estimate VAM and calculate variances

	foreach y in `tests' {

		ivset, y(`y') ptype(`ptype')
			local Y=r(Y)
			local p=r(P)
			local S=r(S)

		_rmcoll `D' if lottery_`ptype'_sample, forcedrop nocons
			local D = r(varlist)

		* estimate VAMs
		foreach vam in unc conv risk rcvam {
			* set controls
			if "`vam'" == "unc" local controls i.year##i.grade
			if "`vam'" == "conv" local controls i.year##(i.grade c.($bl_demos) c.($bl_scores))
			if "`vam'" == "risk" local controls i.year##i.grade `p'
			if "`vam'" == "rcvam" local controls i.year##(i.grade c.($bl_demos) c.($bl_scores)) `p'

			* remove collinear
			_rmcoll `D' `controls', expand nocons
			local varlist = r(varlist)
			local cov
			foreach var in `varlist' {
				if strpos("`var'","o.") == 0 local cov `cov' `var'
			}

			* sort
			cap drop miss
			cap g miss = mi(bl_ss_math_lag) | mi(bl_ss_ela_lag)
			so miss stu
			merge 1:1 stu using "${builddata}${city}_analysisfile`sample'`sch_res'`bw'_older_bl", gen(delta)
			qui count if delta == 1 // in main, not older
			local delta = r(N)
			drop if delta == 2
			drop delta

			* estimate VAM and save residuals
			qui _regress `Y' `cov', nocons
			local `vam'_N = e(N)
			local `vam'_df_m = e(df_m)
			predict `vam'_resids, r

			* store design matrix and residuals (sort on student first)
			mata `vam'_X = st_data(., ("`cov'"))
			mata `vam'_e = st_data(.,"`vam'_resids")

			* store alphas and var(hat{alpha})
			mata `vam'_alphas = st_matrix("e(b)")
			mata `vam'_alphas = `vam'_alphas[1..1,1..`J']'
			mata `vam'_sig2_a = variance(`vam'_alphas)
		}

		* get sampling variance
		foreach vam in unc conv risk rcvam {
			mata `vam'_V = invsym(`vam'_X'*`vam'_X)*(`vam'_e'*`vam'_e)/(``vam'_N'-``vam'_df_m')
			mata `vam'_V = `vam'_V[1..`J',1..`J']
		}

		* get sampling covariance with RC
		foreach vam in unc conv risk {
			mata `vam'_rcvam_cov = invsym(`vam'_X'*`vam'_X)*(`vam'_X'*rcvam_X)*invsym(rcvam_X'*rcvam_X)*(`vam'_e'*rcvam_e)/(``vam'_N'-`rcvam_df_m'-``vam'_df_m')
			mata `vam'_rcvam_cov = `vam'_rcvam_cov[1..`J',1..`J']
		}

		* get sampling covariance with C
		foreach vam in unc risk {
			mata `vam'_conv_cov = invsym(`vam'_X'*`vam'_X)*(`vam'_X'*conv_X)*invsym(conv_X'*conv_X)*(`vam'_e'*conv_e)/(``vam'_N'-`conv_df_m'-``vam'_df_m')
			mata `vam'_conv_cov = `vam'_conv_cov[1..`J',1..`J']
		}

		* get sampling covariance with risk only
		foreach vam in unc {
			mata `vam'_risk_cov = invsym(`vam'_X'*`vam'_X)*(`vam'_X'*risk_X)*invsym(risk_X'*risk_X)*(`vam'_e'*risk_e)/(``vam'_N'-`risk_df_m'-``vam'_df_m')
			mata `vam'_risk_cov = `vam'_risk_cov[1..`J',1..`J']
		}

		* store
		foreach vam in unc conv risk rcvam {
			mata st_matrix("`vam'_alphas",`vam'_alphas)
			mata st_matrix("`vam'_sig2_a",`vam'_sig2_a)
			mata st_matrix("`vam'_V",`vam'_V)
		}
		foreach vam in unc_rcvam conv_rcvam risk_rcvam unc_conv risk_conv unc_risk {
			mata st_matrix("`vam'_cov",`vam'_cov)
		}

		foreach vam in unc conv risk rcvam {
			preserve
			clear
			svmat `vam'_V
			export delimited "${builddata}${city}_`ptype'_alpha_`vam'_sampling_var_`y'.csv", novarnames replace
			restore
		}
		foreach vam in unc_rcvam conv_rcvam risk_rcvam unc_conv risk_conv unc_risk {
			preserve
				clear
				mata st_matrix("`vam'_cov",`vam'_cov)
				svmat `vam'_cov
				export delimited "${builddata}${city}_`ptype'_alpha_`vam'_sampling_cov_`y'.csv", novarnames replace
			restore
		}

		* corresponding calculations for VAMs w/ older lags

			* load
			u "${builddata}${city}_analysisfile`sample'`sch_res'`bw'_older_bl", clear

			* set vars
			local D
			local enrvar enr
			forval k=1/1{
				forval i=1/`J'{
					gen `enrvar'_`oldnum_`i''_`k' = `enrvar'_`oldnum_`i''
					local D `D' `enrvar'_`oldnum_`i''_`k'
				}
			}

			ivset, y(m) ptype(`ptype')
				local Y=r(Y)
				local p=r(P)

			local bl_older_scores $bl_scores

			* sort (kids in baseline sample first)
			cap drop ind
			cap g ind = !mi(bl_math_1yrlag) & !mi(bl_ela_1yrlag)
			gsort -ind stu
			qui count if stu
			local N = r(N)
			merge 1:1 stu using "${builddata}${city}_analysisfile`sample'`sch_res'`bw'.dta", gen(main_merge)
			g add = main_merge == 1
			drop if main_merge == 2
			drop main_merge
			qui count if add
			local add = r(N)

			* estimate conventional with older lags
			_rmcoll `D' i.year##(i.grade c.($bl_demos) c.(`bl_older_scores')), expand
			local varlist = r(varlist)
			local cov
			foreach var in `varlist' {
				if strpos("`var'","o.") == 0 local cov `cov' `var'
			}
			qui _regress `Y' `cov', nocons
			local conv_3yrlag_N = e(N)
			local conv_3yrlag_df_m = e(df_m)
			predict conv_3yrlag_resids, r
			mata conv_3yrlag_X = st_data(., ("`cov'"))
			mata conv_3yrlag_e = st_data(.,"conv_3yrlag_resids")
			local count = _N

			* estimate RC with older lags
			_rmcoll `D' `p' i.year##(i.grade c.($bl_demos) c.(`bl_older_scores')), expand
			local varlist = r(varlist)
			local cov
			foreach var in `varlist' {
				if strpos("`var'","o.") == 0 local cov `cov' `var'
			}
			qui _regress `Y' `cov', nocons
			local rcvam_3yrlag_N = e(N)
			local rcvam_3yrlag_df_m = e(df_m)
			predict rcvam_3yrlag_resids, r
			mata rcvam_3yrlag_X = st_data(., ("`cov'"))
			mata rcvam_3yrlag_e = st_data(.,"rcvam_3yrlag_resids")
			local count = _N

			* fill out contemporaneous vam matrices
			* append zeros for kids who have older lags but not closer lags
			foreach vam in unc conv risk rcvam {
				mata add = J(`add',cols(`vam'_X),0)
				mata `vam'_X = `vam'_X \ add
				mata add = J(`add',1,0)
				mata `vam'_e = `vam'_e \ add
			}

			* fill out older lag vam matrices
			* sandwich zeros for kids who have older lags but not closer lags
			foreach vam in conv_3yrlag rcvam_3yrlag {
				mata top = `vam'_X[1..`=`N'-`add'',1..cols(`vam'_X)]
				mata bot = `vam'_X[`=`N'-`add'+1'..rows(`vam'_X),1..cols(`vam'_X)]
				mata delta = J(`delta',cols(`vam'_X),0)
				mata `vam'_X = top \ delta \ bot
				mata top = `vam'_e[1..`=`N'-`add'',1]
				mata bot = `vam'_e[`=`N'-`add'+1'..rows(`vam'_e),1]
				mata delta = J(`delta',1,0)
				mata `vam'_e = top \ delta \ bot
			}

			* sampling variance
			mata conv_3yrlag_V = invsym(conv_3yrlag_X'*conv_3yrlag_X)*(conv_3yrlag_e'*conv_3yrlag_e)/(`conv_3yrlag_N'-`conv_3yrlag_df_m')
			mata conv_3yrlag_V = conv_3yrlag_V[1..`J',1..`J']
			mata conv_3yrlag_V = mean(diagonal(conv_3yrlag_V))
			mata rcvam_3yrlag_V = invsym(rcvam_3yrlag_X'*rcvam_3yrlag_X)*(rcvam_3yrlag_e'*rcvam_3yrlag_e)/(`rcvam_3yrlag_N'-`rcvam_3yrlag_df_m')
			mata rcvam_3yrlag_V = rcvam_3yrlag_V[1..`J',1..`J']
			mata rcvam_3yrlag_V = mean(diagonal(rcvam_3yrlag_V))

			* get sampling covariance with conventional older lags
			foreach vam in unc conv risk rcvam rcvam_3yrlag {
				mata `vam'_conv_3yrlag_cov = invsym(`vam'_X'*`vam'_X)*(`vam'_X'*conv_3yrlag_X)*invsym(conv_3yrlag_X'*conv_3yrlag_X)*(`vam'_e'*conv_3yrlag_e)/(``vam'_N'-`conv_3yrlag_df_m'-``vam'_df_m')
				mata `vam'_conv_3yrlag_cov = `vam'_conv_3yrlag_cov[1..`J',1..`J']
			}

			* get sampling covariance with RC older lags
			foreach vam in unc conv risk rcvam {
				mata `vam'_rcvam_3yrlag_cov = invsym(`vam'_X'*`vam'_X)*(`vam'_X'*rcvam_3yrlag_X)*invsym(rcvam_3yrlag_X'*rcvam_3yrlag_X)*(`vam'_e'*rcvam_3yrlag_e)/(``vam'_N'-`rcvam_3yrlag_df_m'-``vam'_df_m')
				mata `vam'_rcvam_3yrlag_cov = `vam'_rcvam_3yrlag_cov[1..`J',1..`J']
			}

		* store
		foreach vam in unc_conv_3yrlag conv_conv_3yrlag risk_conv_3yrlag rcvam_conv_3yrlag rcvam_3yrlag_conv_3yrlag {
			mata st_matrix("`vam'_cov",`vam'_cov)
		}
		foreach vam in unc_rcvam_3yrlag conv_rcvam_3yrlag risk_rcvam_3yrlag rcvam_rcvam_3yrlag {
			mata st_matrix("`vam'_cov",`vam'_cov)
		}
		foreach vam in unc_conv_3yrlag conv_conv_3yrlag risk_conv_3yrlag rcvam_conv_3yrlag rcvam_3yrlag_conv_3yrlag unc_rcvam_3yrlag conv_rcvam_3yrlag risk_rcvam_3yrlag rcvam_rcvam_3yrlag {
			preserve
				clear
				mata st_matrix("`vam'_cov",`vam'_cov)
				svmat `vam'_cov
				export delimited "${builddata}${city}_`ptype'_alpha_`vam'_sampling_cov_`y'.csv", novarnames replace
			restore
		}

		}
